1 Aggregated and atomic scores per method

#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#> 
#>     last_plot
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following object is masked from 'package:graphics':
#> 
#>     layout


# datasets = read_yaml("datasets.yml") 
# print(score_file)

# datasets = read_yaml("datasets.yml") 
# datasets = read_yaml(file_dataset) 


list_wd = strsplit(getwd(),'/')[[1]]
# Snakemake script : the current working dir is hadaca3_framework
if(list_wd[length(list_wd)] == 'hadaca3_framework'){
  score_files = list(list.files(path = "./output/scores/", full.names = TRUE))

# nextflow script :
}else{
  score_files = list(list.files(pattern = 'score-li*' ))
}


results_li <- data.frame(
  dataset = character(),
  ref = character(),

  preprocessing_mixRNA = character(),
  feature_selection_mixRNA = character(),

  preprocessing_RNA = character(),
  feature_selection_RNA = character(),

  preprocessing_scRNA = character(),
  feature_selection_scRNA = character(),
  deconvolution_rna = character(),

  preprocessing_mixMET = character(),
  feature_selection_mixMET = character(),

  preprocessing_MET = character(),
  feature_selection_MET = character(),
  deconvolution_met = character(),
  late_integration = character(),
  
  aid = numeric(),
  aid_norm = numeric(),
  aitchison = numeric(),
  aitchison_norm = numeric(),
  jsd = numeric(),
  jsd_norm = numeric(),
  mae = numeric(),
  mae_norm = numeric(),
  pearson_col = numeric(),
  pearson_col_norm = numeric(),
  pearson_row = numeric(),
  pearson_row_norm = numeric(),
  pearson_tot = numeric(),
  pearson_tot_norm = numeric(),
  rmse = numeric(),
  rmse_norm = numeric(),
  score_aggreg = numeric(),
  sdid = numeric(),
  sdid_norm = numeric(),
  spearman_col = numeric(),
  spearman_col_norm = numeric(),
  spearman_row = numeric(),
  spearman_row_norm = numeric(),
  spearman_tot = numeric(),
  spearman_tot_norm = numeric()
)


i = 0 
for (score_file in score_files[[1]]) {
  # Extract the base name of the file

  base_name <- basename(score_file)

  # Extract components from the file name

  components <- str_match(base_name, 
  #       dt   ref  OMIC  ppmR fsmR omic ppR fsR omic  ppSR fsSR  deR   omic  ppmM fsmM omic ppM  fsM  deM  li
  # "score-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
  "score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
  

  # components <- str_match(base_name, "score-(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)")[2:8]
  scores <- read_hdf5(score_file)
  # Append the extracted information to the results data frame
  results_li <- rbind(results_li,
    cbind(
     data.frame(
       dataset = components[1],
       ref = components[2],

       preprocessing_mixRNA = components[3],
       feature_selection_mixRNA = components[4],

       preprocessing_RNA = components[5],
       feature_selection_RNA = components[6],

       preprocessing_scRNA = components[7],
       feature_selection_scRNA = components[8],
       deconvolution_rna = components[9],

       preprocessing_mixMET = components[10],
       feature_selection_mixMET = components[11],

       preprocessing_MET = components[12],
       feature_selection_MET = components[13],
       deconvolution_met = components[14],

       late_integration = components[15],
       stringsAsFactors = FALSE
     ),
     scores
    ))
  rownames(results_li) = NULL

  i = i +1 
}

results_li %>%
  # filter(dc==2) %>%
  group_by(late_integration) %>%
  summarise(GlobalScore = median(score_aggreg)) %>%
  arrange(desc(GlobalScore))
#> # A tibble: 3 × 2
#>   late_integration GlobalScore
#>   <chr>                  <dbl>
#> 1 OnlyMet                0.663
#> 2 limeanRMSE             0.660
#> 3 OnlyRna                0.646


results_li_top5 = results_li %>%
  group_by(preprocessing_mixRNA, feature_selection_mixRNA, 
           preprocessing_RNA, feature_selection_RNA, 
           preprocessing_scRNA, feature_selection_scRNA, deconvolution_rna, 
           preprocessing_mixMET,feature_selection_mixMET, 
           preprocessing_MET, feature_selection_MET, deconvolution_met, 
           late_integration, .groups = "keep") %>% 
  summarise(GlobalScore = median(score_aggreg)) %>%
  arrange(desc(GlobalScore)) 
#> `summarise()` has grouped output by 'preprocessing_mixRNA',
#> 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA',
#> 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
#> 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET',
#> 'feature_selection_MET', 'deconvolution_met', 'late_integration'. You can
#> override using the `.groups` argument.




all_data_used = c('dataset', 'ref')
for(data_used in all_data_used){
  results_li[[data_used]] = factor(results_li[[data_used]], 
  levels = unique(results_li[[data_used]])) # levels will be alphabeticaly ordered
}



all_functions_li = c('preprocessing_mixRNA', 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA', 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna', 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met', 'late_integration' )
for( fun in all_functions_li){
  results_li[[fun]] = factor(results_li[[fun]], 
  levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset=='invitro1'],decreasing = T)])) # sort based on the results_li on the in vitro dataset
}



index_aggreg <- which(names(results_li) == "score_aggreg")

datatable(
  results_li[, c(1:length(all_functions_li)+2, index_aggreg)],
  extensions = 'Buttons',
  options = list(
    pageLength = 10,
    dom = 'Bfrtip',  # This includes the Buttons extension in the layout
    buttons = list(
      list(
        extend = 'colvis',
        text = 'Show/Hide Columns',
        columns = ':not(:first-child)'  # This allows all columns except the first to be toggled
      )
    )
  )
)

2 Early integration_table

#> # A tibble: 0 × 2
#> # ℹ 2 variables: early_integration <chr>, GlobalScore <dbl>

3 Visualisations of the top 5 methods

test_dataset = 'invitro1'
name_part <- gsub("[0-9]+$", "", test_dataset)
number_part <- gsub("[^0-9]", "", test_dataset)

ground_truth_name_file = paste0("groundtruth",number_part,'_',name_part,"_pdac.h5")
ground_truth = read_hdf5(paste0(ground_truth_name_file))$groundtruth

print(ground_truth)
#>         COMT02TFZP   COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP   COMT09TFZP
#> endo    0.27930342 0.1346876324 0.15188568 0.15486036 0.13274656 0.1119107593
#> fibro   0.34660545 0.3824721705 0.42652665 0.44660194 0.42781906 0.5567950102
#> immune  0.08661531 0.1346876324 0.11086336 0.09888529 0.09067407 0.0894806285
#> classic 0.03208865 0.3471947859 0.28674866 0.28670742 0.02807638 0.0009595778
#> basal   0.25538717 0.0009577787 0.02397564 0.01294498 0.32068393 0.2408540242
#>         COMT30TFZP COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo    0.22966060 0.09939095  0.1793829 0.12084882  0.1045731  0.1440144
#> fibro   0.48280241 0.43257242  0.4673523 0.49154778  0.5172227  0.5719772
#> immune  0.06290808 0.12538366  0.1225783 0.12288694  0.1080669  0.0379838
#> classic 0.00000000 0.00000000  0.2306864 0.18283179  0.2701372  0.1899790
#> basal   0.22462891 0.34265298  0.0000000 0.08188467  0.0000000  0.0560456
#>         COMT18TFZP COMT01TFZP COMT23TFZP
#> endo    0.15858108 0.29755109 0.13487915
#> fibro   0.44455683 0.38391542 0.45655093
#> immune  0.12039880 0.06208194 0.04193842
#> classic 0.23227251 0.05011260 0.36663150
#> basal   0.04419078 0.20633894 0.00000000


prediction_file = sapply(1:nrow(results_li_top5), function(i){
  paste0("pred-li-",paste(c("invitro1", "ref", results_li_top5[i,1:13]),collapse = "_") ,".h5")
})
pred = lapply(prediction_file, \(path){read_hdf5(path)$pred})  # mettre $pred derrière peut être ? 

print(pred)
#> [[1]]
#>          COMT02TFZP  COMT05TFZP  COMT13TFZP COMT17TFZP  COMT28TFZP   COMT09TFZP
#> endo    0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro   0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune  0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal   0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#>           COMT30TFZP   COMT11TFZP COMT16TFZP COMT14TFZP  COMT24TFZP  COMT12TFZP
#> endo    0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro   0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune  0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal   0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#>         COMT18TFZP  COMT01TFZP COMT23TFZP
#> endo    0.20550658 0.252505856 0.12826958
#> fibro   0.44919772 0.658817451 0.55497968
#> immune  0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal   0.27232237 0.071922884 0.26864744
#> 
#> [[2]]
#>          COMT02TFZP  COMT05TFZP  COMT13TFZP COMT17TFZP  COMT28TFZP   COMT09TFZP
#> endo    0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro   0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune  0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal   0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#>           COMT30TFZP   COMT11TFZP COMT16TFZP COMT14TFZP  COMT24TFZP  COMT12TFZP
#> endo    0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro   0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune  0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal   0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#>         COMT18TFZP  COMT01TFZP COMT23TFZP
#> endo    0.20550658 0.252505856 0.12826958
#> fibro   0.44919772 0.658817451 0.55497968
#> immune  0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal   0.27232237 0.071922884 0.26864744
#> 
#> [[3]]
#>          COMT02TFZP  COMT05TFZP  COMT13TFZP COMT17TFZP  COMT28TFZP   COMT09TFZP
#> endo    0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro   0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune  0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal   0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#>           COMT30TFZP   COMT11TFZP COMT16TFZP COMT14TFZP  COMT24TFZP  COMT12TFZP
#> endo    0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro   0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune  0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal   0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#>         COMT18TFZP  COMT01TFZP COMT23TFZP
#> endo    0.20550658 0.252505856 0.12826958
#> fibro   0.44919772 0.658817451 0.55497968
#> immune  0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal   0.27232237 0.071922884 0.26864744
#> 
#> [[4]]
#>          COMT02TFZP  COMT05TFZP  COMT13TFZP COMT17TFZP  COMT28TFZP   COMT09TFZP
#> endo    0.212188608 0.150847656 0.127830728 0.17012495 0.168477547 0.1663772111
#> fibro   0.719241791 0.474346124 0.580474900 0.45870095 0.723880347 0.8079226100
#> immune  0.006486137 0.007965471 0.008504063 0.01116058 0.012229579 0.0052679876
#> classic 0.006577870 0.051431954 0.037384023 0.05170145 0.009316453 0.0008474217
#> basal   0.055505595 0.315408795 0.245806286 0.30831207 0.086096073 0.0195847696
#>           COMT30TFZP   COMT11TFZP COMT16TFZP COMT14TFZP  COMT24TFZP  COMT12TFZP
#> endo    0.1948986745 0.1175873835 0.17861790 0.13702482 0.127773504 0.162219257
#> fibro   0.7824100972 0.8581618874 0.54249650 0.66881880 0.600778429 0.627234600
#> immune  0.0057202599 0.0054064206 0.01122914 0.01051585 0.007918624 0.006656194
#> classic 0.0008059356 0.0007668087 0.03752967 0.02574475 0.036322560 0.027877206
#> basal   0.0161650328 0.0180774998 0.23012679 0.15789579 0.227206883 0.176012743
#>         COMT18TFZP  COMT01TFZP COMT23TFZP
#> endo    0.20550658 0.252505856 0.12826958
#> fibro   0.44919772 0.658817451 0.55497968
#> immune  0.02497565 0.006192471 0.00689947
#> classic 0.04799767 0.010561337 0.04120383
#> basal   0.27232237 0.071922884 0.26864744
#> 
#> [[5]]
#>         COMT02TFZP COMT05TFZP COMT13TFZP COMT17TFZP COMT28TFZP  COMT09TFZP
#> endo    0.22169845 0.14793173 0.13284560 0.16195740 0.17076846 0.174322828
#> fibro   0.68567831 0.41853911 0.51528989 0.41147527 0.68671679 0.783467525
#> immune  0.01222161 0.01609737 0.01639415 0.01969765 0.02145217 0.010342698
#> classic 0.01371836 0.07703053 0.06045139 0.07693695 0.01902355 0.003366243
#> basal   0.06668327 0.34040127 0.27501897 0.32993272 0.10203904 0.028500706
#>          COMT30TFZP  COMT11TFZP COMT16TFZP COMT14TFZP COMT24TFZP COMT12TFZP
#> endo    0.207479997 0.123191172 0.17728547 0.13924459 0.13218774 0.16566077
#> fibro   0.753543597 0.835627697 0.49097641 0.61506088 0.53972682 0.57543942
#> immune  0.011279409 0.010764967 0.01999623 0.01871931 0.01572625 0.01363989
#> classic 0.003062516 0.003543112 0.06050784 0.04269593 0.05895694 0.04742543
#> basal   0.024634480 0.026873051 0.25123405 0.18427929 0.25340226 0.19783449
#>         COMT18TFZP COMT01TFZP COMT23TFZP
#> endo    0.19802445 0.24664876 0.13208414
#> fibro   0.39859610 0.63426640 0.49002753
#> immune  0.03554026 0.01338949 0.01421802
#> classic 0.07145061 0.02030218 0.06574376
#> basal   0.29638858 0.08539318 0.29792654

4 Visualisations of the different metrics

4.1 Aggregated scores

4.1.1 PP

4.1.2 FS

4.1.3 DE

4.1.4 LI

4.2 MAE

4.2.1 PP

4.2.2 FS

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.2.3 DE

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.2.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3 RMSE

4.3.1 PP

4.3.2 FS

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3.3 DE

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.4 Spearman correlation (row)

4.4.1 PP

4.4.2 FS

4.4.3 DE

4.4.4 LI

4.5 Aitchison distance

4.5.1 PP

4.5.2 FS

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.5.3 DE

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.5.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).